# Paths
path_to_metadata <- "../01-cellranger_mapping/data/tonsil_atlas_metadata.csv"
path_to_save_obj <- "../results"
rds_obj <- str_c(
path_to_save_obj,
"seurat_object_cite_seq_postQC.rds",
sep = "/"
)
path_to_scrublet <- "../02-scrublet_doublet_detection/results"
path_to_vireo_genotype <- "../03-demultiplexing_genotype/2-vireo"
path_to_cellranger_output <- "../01-cellranger_mapping/projects"
min_count_33 <- 600
max_count_33 <- 12000
min_feat_33 <- 300
max_feat_33 <- 3000
max_mt_33 <- 15
min_count_38 <- 300
max_count_38 <- 6000
min_feat_38 <- 200
max_feat_38 <- 2000
max_mt_38 <- 15
min_count_40 <- 400
max_count_40 <- 6000
min_feat_40 <- 250
max_feat_40 <- 2250
max_mt_40 <- 10
min_count_46 <- 350
max_count_46 <- 6000
min_feat_46 <- 300
max_feat_46 <- 2500
max_mt_46 <- 15
min_counts <- c(min_count_33, min_count_38, min_count_40, min_count_46)
max_counts <- c(max_count_33, max_count_38, max_count_40, max_count_46)
min_feats <- c(min_feat_33, min_feat_38, min_feat_40, min_feat_46)
max_feats <- c(max_feat_33, max_feat_38, max_feat_40, max_feat_46)
max_mt <- c(max_mt_33, max_mt_38, max_mt_40, max_mt_46)
max_genes <- 10
GEX and CITEseq (ADT) dataset is integrated to set up the seurat object for the project
## Rows: 60 Columns: 6
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): subproject, gem_id, library_name, type, donor_id
## dbl (1): library_id
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
##
## BCLLATLAS_33_ifZOgenn_TpMNTvBa BCLLATLAS_33_mLuLpVxi_v0fLyotc
## 2947 2649
## BCLLATLAS_38_B20O1bh7_VmM99YZJ BCLLATLAS_38_rdFRFhrU_ZdYeOZlf
## 2269 2565
## BCLLATLAS_38_WToIzInl_LudU7hVX BCLLATLAS_40_CfdzDgHe_IMOTbrIP
## 1855 2799
## BCLLATLAS_40_LxBTpkPO_8TcNpBg4 BCLLATLAS_40_pseMjZsU_qgZNOhOQ
## 3016 3022
## BCLLATLAS_40_ujxNn2kq_lG2VLlYd BCLLATLAS_40_uqJAc4r9_BYScOzxA
## 3098 3125
## BCLLATLAS_46_BZEECBEG_GXkc6Q1y BCLLATLAS_46_HjqdPU0E_aGDmEY5F
## 4191 3166
## BCLLATLAS_46_KETfaLdx_Ub1mtE13 BCLLATLAS_46_SOJZt9kY_qpnv20QN
## 4498 3574
## BCLLATLAS_46_XV1SLOR2_HRF5D9A3
## 3571
bcll.combined[["percent.mt"]] <- PercentageFeatureSet(bcll.combined, pattern = "^MT-")
bcll.combined$log10GenesPerUMI <- log10(bcll.combined$nFeature_RNA) / log10(bcll.combined$nCount_RNA)
subproject_seurat_obj_list <- SplitObject(bcll.combined, split.by = "subproject")
QC_summary <- bcll.combined@meta.data %>% group_by(orig.ident) %>% summarise(Total_cells = n(), Mean_RNA_count= mean(nCount_RNA), Mean_feature_count = mean(nFeature_RNA), Mean_mito_percent = mean(percent.mt), Mean_log10GenesPerUMI = mean(log10GenesPerUMI) ,Number_of_scrublet_doublet = sum(scrublet_predicted_doublet == "True"), Number_of_genotype_doublet = sum(genotype_based_doublet_flag == "T") , Number_of_genotype_unassigned = sum(genotype_based_unassigned_flag == "T") ,Number_of_TCR = sum(tcr_flag == "T", na.rm = T) , Number_of_BCR = sum(bcr_flag == "T", na.rm = T) , Number_of_mait_cells = sum(mait_evidence %in% c("TRB:gene","TRA:gene+junction;TRB:gene","TRA:gene","TRA:gene;TRB:gene","TRA:junction;TRB:gene","TRA:junction","TRA:gene+junction"), na.rm = T), Number_of_inkt_cells =sum(inkt_evidence %in% c("TRB:gene","TRA:gene;TRB:gene","TRA:gene"), na.rm = T), .groups = 'drop')
metadata <- bcll.combined@meta.data
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
kable(QC_summary) %>%
kable_styling("striped", full_width = T)
| orig.ident | Total_cells | Mean_RNA_count | Mean_feature_count | Mean_mito_percent | Mean_log10GenesPerUMI | Number_of_scrublet_doublet | Number_of_genotype_doublet | Number_of_genotype_unassigned | Number_of_TCR | Number_of_BCR | Number_of_mait_cells | Number_of_inkt_cells |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BCLLATLAS_33_ifZOgenn_TpMNTvBa | 2947 | 2372.930 | 979.3444 | 3.265162 | 0.8934118 | 4 | 0 | 0 | 330 | 1166 | 79 | 3 |
| BCLLATLAS_33_mLuLpVxi_v0fLyotc | 2649 | 3043.896 | 1163.6436 | 3.196555 | 0.8888973 | 1 | 0 | 0 | 304 | 1362 | 72 | 0 |
| BCLLATLAS_38_B20O1bh7_VmM99YZJ | 2269 | 1016.181 | 496.9912 | 5.202306 | 0.9012692 | 3 | 127 | 150 | 206 | 590 | 49 | 1 |
| BCLLATLAS_38_rdFRFhrU_ZdYeOZlf | 2565 | 1306.379 | 601.7649 | 4.409707 | 0.8988821 | 2 | 175 | 96 | 237 | 822 | 56 | 2 |
| BCLLATLAS_38_WToIzInl_LudU7hVX | 1855 | 1020.876 | 492.7003 | 4.742993 | 0.9001380 | 1 | 87 | 71 | 316 | 464 | 59 | 2 |
| BCLLATLAS_40_CfdzDgHe_IMOTbrIP | 2799 | 1472.776 | 662.8910 | 2.992291 | 0.8992132 | 7 | 0 | 0 | 280 | 900 | 52 | 3 |
| BCLLATLAS_40_LxBTpkPO_8TcNpBg4 | 3016 | 1452.220 | 687.0802 | 2.667517 | 0.9061708 | 1 | 0 | 0 | 290 | 1078 | 66 | 1 |
| BCLLATLAS_40_pseMjZsU_qgZNOhOQ | 3022 | 1386.118 | 645.4596 | 2.803393 | 0.9016561 | 9 | 0 | 0 | 293 | 1059 | 66 | 6 |
| BCLLATLAS_40_ujxNn2kq_lG2VLlYd | 3098 | 1270.212 | 615.1394 | 2.843212 | 0.9071128 | 1 | 0 | 0 | 288 | 1014 | 70 | 2 |
| BCLLATLAS_40_uqJAc4r9_BYScOzxA | 3125 | 1670.094 | 754.3158 | 2.619818 | 0.9019308 | 4 | 0 | 0 | 297 | 1038 | 61 | 3 |
| BCLLATLAS_46_BZEECBEG_GXkc6Q1y | 4191 | 1653.451 | 624.2014 | 4.290005 | 0.8797365 | 230 | 0 | 0 | 1249 | 989 | 293 | 12 |
| BCLLATLAS_46_HjqdPU0E_aGDmEY5F | 3166 | 1390.643 | 596.7274 | 3.802479 | 0.8916281 | 5 | 0 | 0 | 720 | 503 | 158 | 5 |
| BCLLATLAS_46_KETfaLdx_Ub1mtE13 | 4498 | 1678.511 | 666.0965 | 3.997394 | 0.8866078 | 179 | 0 | 0 | 1626 | 1476 | 383 | 10 |
| BCLLATLAS_46_SOJZt9kY_qpnv20QN | 3574 | 1770.673 | 712.5193 | 3.834415 | 0.8884291 | 4 | 0 | 0 | 817 | 761 | 212 | 3 |
| BCLLATLAS_46_XV1SLOR2_HRF5D9A3 | 3571 | 1915.572 | 778.2716 | 3.465346 | 0.8927950 | 10 | 0 | 0 | 1005 | 1072 | 232 | 8 |
Cross table to check the doublet called by scrublet and by the genotype package
table(metadata$donor_id, metadata$scrublet_predicted_doublet)
##
## False True
## B20O1bh7_VmM99YZJ_donor0 950 0
## B20O1bh7_VmM99YZJ_donor1 255 1
## B20O1bh7_VmM99YZJ_donor2 461 0
## B20O1bh7_VmM99YZJ_donor3 325 0
## B20O1bh7_VmM99YZJ_doublet 125 2
## B20O1bh7_VmM99YZJ_unassigned 150 0
## BCLL-15-T 18572 428
## BCLL-8-T 5591 5
## BCLL-9-T 15038 22
## rdFRFhrU_ZdYeOZlf_donor0 344 0
## rdFRFhrU_ZdYeOZlf_donor1 326 0
## rdFRFhrU_ZdYeOZlf_donor2 1068 0
## rdFRFhrU_ZdYeOZlf_donor3 556 0
## rdFRFhrU_ZdYeOZlf_doublet 173 2
## rdFRFhrU_ZdYeOZlf_unassigned 96 0
## WToIzInl_LudU7hVX_donor0 418 0
## WToIzInl_LudU7hVX_donor1 319 0
## WToIzInl_LudU7hVX_donor2 572 0
## WToIzInl_LudU7hVX_donor3 388 0
## WToIzInl_LudU7hVX_doublet 86 1
## WToIzInl_LudU7hVX_unassigned 71 0
The cell counts are determined by the number of unique cellular barcodes detected.
# Visualize the number of cell counts per sample
plot_cell_count <- function(obj){
metadata_subset <- obj@meta.data
p1 <- metadata_subset %>%
ggplot(aes(x=sample, fill=sample)) +
geom_bar() +
theme_classic() +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells") +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
}
plot_list <- lapply(subproject_seurat_obj_list, plot_cell_count)
plot_list[1]
## $BCLLATLAS_33
plot_list[2]
## $BCLLATLAS_38
plot_list[3]
## $BCLLATLAS_40
plot_list[4]
## $BCLLATLAS_46
# Visualize the number Feature per cell
feature_per_cell_plot <- function(x){
obj <- subproject_seurat_obj_list[[x]]
metadata_subset <- obj@meta.data
p2 <- metadata_subset %>%
ggplot(aes(color=sample, x=nFeature_RNA, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = c(min_feats[[x]], max_feats[[x]]))
}
vec=c(1,2,3,4)
plot_list <- lapply(vec,feature_per_cell_plot)
plot_list[1]
## [[1]]
plot_list[2]
## [[1]]
plot_list[3]
## [[1]]
plot_list[4]
## [[1]]
# Visualize the number Gene per cell
gene_per_cell_plot <- function(x){
obj <- subproject_seurat_obj_list[[x]]
metadata_subset <- obj@meta.data
p3 <- metadata_subset %>%
ggplot(aes(color=sample, x=nCount_RNA, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = c(min_counts[[x]], max_counts[[x]]))
}
plot_list <- lapply(vec, gene_per_cell_plot)
plot_list[1]
## [[1]]
plot_list[2]
## [[1]]
plot_list[3]
## [[1]]
plot_list[4]
## [[1]]
# Visualize the distribution of genes detected per cell via boxplot
gene_per_cell_boxplot <- function(obj){
metadata_subset <- obj@meta.data
p4 <- metadata_subset %>%
ggplot(aes(x=sample, y=log10(nCount_RNA), fill=sample)) +
geom_boxplot() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells vs NGenes")
}
plot_list <- lapply(subproject_seurat_obj_list, gene_per_cell_boxplot)
plot_list[1]
## $BCLLATLAS_33
plot_list[2]
## $BCLLATLAS_38
plot_list[3]
## $BCLLATLAS_40
plot_list[4]
## $BCLLATLAS_46
# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs
correlation_plot <- function(obj){
metadata_subset <- obj@meta.data
p5 <- metadata_subset %>%
ggplot(aes(x=nFeature_RNA, y=nCount_RNA, color=percent.mt)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~sample)
}
plot_list <- lapply(subproject_seurat_obj_list, correlation_plot)
plot_list[1]
## $BCLLATLAS_33
## `geom_smooth()` using formula 'y ~ x'
plot_list[2]
## $BCLLATLAS_38
## `geom_smooth()` using formula 'y ~ x'
plot_list[3]
## $BCLLATLAS_40
## `geom_smooth()` using formula 'y ~ x'
plot_list[4]
## $BCLLATLAS_46
## `geom_smooth()` using formula 'y ~ x'
# Visualize the distribution of mitochondrial gene expression detected per cell
mt_plot <- function(x){
obj <- subproject_seurat_obj_list[[x]]
metadata_subset <- obj@meta.data
p6 <- metadata_subset %>%
ggplot(aes(color=sample, x=percent.mt, fill=sample)) +
geom_density(alpha = 15) +
theme_classic() +
geom_vline(xintercept = max_mt[[x]])
}
vec <- c(1,2,3,4)
plot_list <- lapply(vec, mt_plot)
plot_list[1]
## [[1]]
plot_list[2]
## [[1]]
plot_list[3]
## [[1]]
plot_list[4]
## [[1]]
# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI
complexity_plot <- function(obj){
metadata_subset <- obj@meta.data
p7 <- metadata_subset %>%
ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
geom_vline(xintercept = 0.8)
}
plot_list <- lapply(subproject_seurat_obj_list, complexity_plot)
plot_list[1]
## $BCLLATLAS_33
plot_list[2]
## $BCLLATLAS_38
plot_list[3]
## $BCLLATLAS_40
plot_list[4]
## $BCLLATLAS_46
VlnPlot(bcll.combined, features = "nCount_ADT",
log = TRUE, pt.size = 0) + NoLegend()
VlnPlot(bcll.combined, features = "nCount_ADT",log = FALSE, pt.size = 0, y.max = 5000) + NoLegend()
## Warning: Removed 554 rows containing non-finite values (stat_ydensity).
VlnPlot(bcll.combined, features = "nCount_RNA", log = TRUE, pt.size = 0) + NoLegend()
VlnPlot(bcll.combined, features = "percent.mt",
log = TRUE, pt.size = 0) + NoLegend()
VlnPlot(bcll.combined, features = "scrublet_doublet_scores",
log = TRUE, pt.size = 0) + NoLegend()
# Filter out low quality reads using selected thresholds - these will change with experiment
subset_seurat_obj <- function(x) {
subset_seurat <- subset(subproject_seurat_obj_list[[x]], subset = nFeature_RNA > min_feats[[x]] & nFeature_RNA < max_feats[[x]] & percent.mt < max_mt[[x]] & nCount_RNA > min_counts[[x]] & nCount_RNA < max_counts[[x]])
return(subset_seurat)
}
subset_seurat_obj
## function(x) {
## subset_seurat <- subset(subproject_seurat_obj_list[[x]], subset = nFeature_RNA > min_feats[[x]] & nFeature_RNA < max_feats[[x]] & percent.mt < max_mt[[x]] & nCount_RNA > min_counts[[x]] & nCount_RNA < max_counts[[x]])
## return(subset_seurat)
## }
vec=c(1,2,3,4)
filtered_subproject_seurat_obj_list <- lapply(vec, subset_seurat_obj)
filtered_bcll.combined <- Reduce(merge, filtered_subproject_seurat_obj_list)
# Output a logical vector for every gene on whether the more than zero counts per cell
# Extract counts
counts <- GetAssayData(object = filtered_bcll.combined, slot = "counts")
# Output a logical vector for every gene on whether the more than zero counts per cell
nonzero <- counts > 0
# Sums all TRUE values and returns TRUE if more than max_genes TRUE values per gene
keep_genes <- Matrix::rowSums(nonzero) >= max_genes
# Only keeping those genes expressed in more than max_genes cells
filtered_counts <- counts[keep_genes, ]
# Reassign to filtered Seurat object
filtered_bcll.combined <- CreateSeuratObject(filtered_counts, meta.data = filtered_bcll.combined@meta.data)
cells.use = rownames(filtered_bcll.combined@meta.data)
filtered_bcll.combined[["ADT"]] <- subset(bcll.combined[["ADT"]], cells = cells.use)
# Save filtered subset to new metadata
metadata_clean <- filtered_bcll.combined@meta.data
Re_QC_summary <- filtered_bcll.combined@meta.data %>% group_by(orig.ident) %>% summarise(Total_cells = n(), Mean_RNA_count= mean(nCount_RNA), Mean_feature_count = mean(nFeature_RNA), Mean_mito_percent = mean(percent.mt), Mean_log10GenesPerUMI = mean(log10GenesPerUMI), Number_of_scrublet_doublet = sum(scrublet_predicted_doublet == "True"), Number_of_genotype_doublet = sum(genotype_based_doublet_flag == "T") , Number_of_genotype_unassigned = sum(genotype_based_unassigned_flag == "T"), Number_of_TCR = sum(tcr_flag == "T", na.rm = T) , Number_of_BCR = sum(bcr_flag == "T", na.rm = T) , Number_of_mait_cells = sum(mait_evidence %in% c("TRB:gene","TRA:gene+junction;TRB:gene","TRA:gene","TRA:gene;TRB:gene","TRA:junction;TRB:gene","TRA:junction","TRA:gene+junction"), na.rm = T), Number_of_inkt_cells =sum(inkt_evidence %in% c("TRB:gene","TRA:gene;TRB:gene","TRA:gene"), na.rm = T), .groups = 'drop')
library(knitr)
library(kableExtra)
kable(Re_QC_summary) %>%
kable_styling("striped", full_width = T)
| orig.ident | Total_cells | Mean_RNA_count | Mean_feature_count | Mean_mito_percent | Mean_log10GenesPerUMI | Number_of_scrublet_doublet | Number_of_genotype_doublet | Number_of_genotype_unassigned | Number_of_TCR | Number_of_BCR | Number_of_mait_cells | Number_of_inkt_cells |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BCLLATLAS_33_ifZOgenn_TpMNTvBa | 2833 | 2331.804 | 979.4328 | 2.977751 | 0.8957582 | 3 | 0 | 0 | 317 | 1133 | 76 | 3 |
| BCLLATLAS_33_mLuLpVxi_v0fLyotc | 2506 | 2873.893 | 1140.8432 | 2.732257 | 0.8918550 | 1 | 0 | 0 | 292 | 1315 | 68 | 0 |
| BCLLATLAS_38_B20O1bh7_VmM99YZJ | 2151 | 1007.170 | 503.5807 | 4.717720 | 0.9080851 | 3 | 123 | 116 | 205 | 544 | 49 | 1 |
| BCLLATLAS_38_rdFRFhrU_ZdYeOZlf | 2477 | 1259.698 | 594.3181 | 4.136973 | 0.9024274 | 2 | 163 | 79 | 232 | 782 | 56 | 2 |
| BCLLATLAS_38_WToIzInl_LudU7hVX | 1797 | 1011.050 | 494.9399 | 4.320309 | 0.9050298 | 1 | 85 | 48 | 313 | 451 | 58 | 2 |
| BCLLATLAS_40_CfdzDgHe_IMOTbrIP | 2679 | 1421.684 | 656.7962 | 2.639688 | 0.9027553 | 7 | 0 | 0 | 278 | 852 | 52 | 3 |
| BCLLATLAS_40_LxBTpkPO_8TcNpBg4 | 2904 | 1401.250 | 681.2307 | 2.423353 | 0.9094013 | 1 | 0 | 0 | 285 | 1031 | 66 | 1 |
| BCLLATLAS_40_pseMjZsU_qgZNOhOQ | 2897 | 1366.661 | 650.6966 | 2.520651 | 0.9058407 | 8 | 0 | 0 | 291 | 1015 | 66 | 6 |
| BCLLATLAS_40_ujxNn2kq_lG2VLlYd | 2961 | 1244.923 | 618.9976 | 2.604378 | 0.9114260 | 1 | 0 | 0 | 282 | 963 | 68 | 2 |
| BCLLATLAS_40_uqJAc4r9_BYScOzxA | 2992 | 1591.382 | 741.9171 | 2.340719 | 0.9046704 | 3 | 0 | 0 | 291 | 980 | 61 | 3 |
| BCLLATLAS_46_BZEECBEG_GXkc6Q1y | 3941 | 1510.374 | 612.3644 | 3.693103 | 0.8822999 | 224 | 0 | 0 | 1221 | 874 | 288 | 12 |
| BCLLATLAS_46_HjqdPU0E_aGDmEY5F | 2969 | 1289.834 | 587.0882 | 3.489705 | 0.8962578 | 5 | 0 | 0 | 707 | 425 | 155 | 5 |
| BCLLATLAS_46_KETfaLdx_Ub1mtE13 | 4236 | 1510.265 | 645.6591 | 3.506280 | 0.8900121 | 176 | 0 | 0 | 1592 | 1326 | 375 | 9 |
| BCLLATLAS_46_SOJZt9kY_qpnv20QN | 3312 | 1579.972 | 688.8919 | 3.409893 | 0.8933073 | 4 | 0 | 0 | 797 | 631 | 205 | 3 |
| BCLLATLAS_46_XV1SLOR2_HRF5D9A3 | 3331 | 1661.469 | 738.5749 | 3.173167 | 0.8967846 | 9 | 0 | 0 | 983 | 915 | 229 | 7 |
# Visualize the number Feature per cell
metadata_clean %>%
ggplot(aes(color=sample, x=nFeature_RNA, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 2500)
# Visualize the number Gene per cell
metadata_clean %>%
ggplot(aes(color=sample, x=nCount_RNA, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 200)
# Visualize the distribution of mitochondrial gene expression detected per cell
metadata_clean %>%
ggplot(aes(color=sample, x=percent.mt, fill=sample)) +
geom_density(alpha = 15) +
theme_classic() +
geom_vline(xintercept = 20)
Cross table to check the doublet called by scrublet and by the genotype package
table(metadata_clean$donor_id, metadata_clean$scrublet_predicted_doublet)
##
## False True
## B20O1bh7_VmM99YZJ_donor0 933 0
## B20O1bh7_VmM99YZJ_donor1 238 1
## B20O1bh7_VmM99YZJ_donor2 427 0
## B20O1bh7_VmM99YZJ_donor3 313 0
## B20O1bh7_VmM99YZJ_doublet 121 2
## B20O1bh7_VmM99YZJ_unassigned 116 0
## BCLL-15-T 17371 418
## BCLL-8-T 5335 4
## BCLL-9-T 14413 20
## rdFRFhrU_ZdYeOZlf_donor0 331 0
## rdFRFhrU_ZdYeOZlf_donor1 314 0
## rdFRFhrU_ZdYeOZlf_donor2 1049 0
## rdFRFhrU_ZdYeOZlf_donor3 541 0
## rdFRFhrU_ZdYeOZlf_doublet 161 2
## rdFRFhrU_ZdYeOZlf_unassigned 79 0
## WToIzInl_LudU7hVX_donor0 399 0
## WToIzInl_LudU7hVX_donor1 318 0
## WToIzInl_LudU7hVX_donor2 568 0
## WToIzInl_LudU7hVX_donor3 379 0
## WToIzInl_LudU7hVX_doublet 84 1
## WToIzInl_LudU7hVX_unassigned 48 0
sprintf("Total Cells : %i", length(rownames(metadata)))
## [1] "Total Cells : 46345"
sprintf("Total Cells post QC: %i", length(rownames(metadata_clean)))
## [1] "Total Cells post QC: 43986"
sprintf("T Cells (containing TCR): %i", length(rownames(subset(metadata_clean, metadata_clean$tcr_flag == "T"))))
## [1] "T Cells (containing TCR): 8086"
sprintf("B Cells (containing BCR) : %i", length(rownames(subset(metadata_clean, metadata_clean$bcr_flag == "T"))))
## [1] "B Cells (containing BCR) : 13237"
sprintf("Scrublet Doublet : %i", length(rownames(subset(metadata_clean, metadata_clean$scrublet_predicted_doublet == "True"))))
## [1] "Scrublet Doublet : 448"
sprintf("Genotype Doublet : %i", length(rownames(subset(metadata_clean, metadata_clean$genotype_based_doublet_flag == "T"))))
## [1] "Genotype Doublet : 371"
sprintf("Genotype Unassigned : %i", length(rownames(subset(metadata_clean, metadata_clean$genotype_based_unassigned_flag == "T"))))
## [1] "Genotype Unassigned : 243"
saveRDS(filtered_bcll.combined, file = rds_obj)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.3.4 gridExtra_2.3 forcats_0.5.1 stringr_1.4.0
## [5] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.6
## [9] tidyverse_1.3.1 ggplot2_3.3.5 dplyr_1.0.8 SeuratObject_4.0.4
## [13] Seurat_4.1.0 knitr_1.37
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.4.1 systemfonts_1.0.4
## [4] plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2
## [7] splines_4.0.3 listenv_0.8.0 scattermore_0.8
## [10] digest_0.6.29 htmltools_0.5.2 fansi_1.0.2
## [13] magrittr_2.0.2 tensor_1.5 cluster_2.1.2
## [16] ROCR_1.0-11 tzdb_0.2.0 globals_0.14.0
## [19] modelr_0.1.8 matrixStats_0.61.0 vroom_1.5.7
## [22] svglite_2.1.0 spatstat.sparse_2.1-0 colorspace_2.0-3
## [25] rvest_1.0.2 ggrepel_0.9.1 haven_2.4.3
## [28] xfun_0.30 crayon_1.5.0 jsonlite_1.8.0
## [31] spatstat.data_2.1-2 survival_3.3-1 zoo_1.8-9
## [34] glue_1.6.2 polyclip_1.10-0 gtable_0.3.0
## [37] webshot_0.5.2 leiden_0.3.9 future.apply_1.8.1
## [40] abind_1.4-5 scales_1.1.1 DBI_1.1.2
## [43] spatstat.random_2.1-0 miniUI_0.1.1.1 Rcpp_1.0.8.3
## [46] viridisLite_0.4.0 xtable_1.8-4 reticulate_1.24
## [49] spatstat.core_2.4-0 bit_4.0.4 htmlwidgets_1.5.4
## [52] httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.2
## [55] ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [58] sass_0.4.0 uwot_0.1.11 dbplyr_2.1.1
## [61] deldir_1.0-6 utf8_1.2.2 labeling_0.4.2
## [64] tidyselect_1.1.2 rlang_1.0.2 reshape2_1.4.4
## [67] later_1.3.0 munsell_0.5.0 cellranger_1.1.0
## [70] tools_4.0.3 cli_3.2.0 generics_0.1.2
## [73] broom_0.7.12 ggridges_0.5.3 evaluate_0.15
## [76] fastmap_1.1.0 yaml_2.3.5 goftest_1.2-3
## [79] bit64_4.0.5 fs_1.5.2 fitdistrplus_1.1-8
## [82] RANN_2.6.1 pbapply_1.5-0 future_1.24.0
## [85] nlme_3.1-155 mime_0.12 xml2_1.3.3
## [88] compiler_4.0.3 rstudioapi_0.13 plotly_4.10.0
## [91] png_0.1-7 spatstat.utils_2.3-0 reprex_2.0.1
## [94] bslib_0.3.1 stringi_1.7.6 highr_0.9
## [97] lattice_0.20-45 Matrix_1.4-0 vctrs_0.3.8
## [100] pillar_1.7.0 lifecycle_1.0.1 spatstat.geom_2.3-2
## [103] lmtest_0.9-39 jquerylib_0.1.4 RcppAnnoy_0.0.19
## [106] data.table_1.14.2 cowplot_1.1.1 irlba_2.3.5
## [109] httpuv_1.6.5 patchwork_1.1.1 R6_2.5.1
## [112] promises_1.2.0.1 KernSmooth_2.23-20 parallelly_1.30.0
## [115] codetools_0.2-18 MASS_7.3-55 assertthat_0.2.1
## [118] withr_2.5.0 sctransform_0.3.3 mgcv_1.8-39
## [121] parallel_4.0.3 hms_1.1.1 grid_4.0.3
## [124] rpart_4.1.16 rmarkdown_2.13 Rtsne_0.15
## [127] shiny_1.7.1 lubridate_1.8.0